{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib\n",
    "import math\n",
    "import os\n",
    "from math import sqrt \n",
    "\n",
    "#functions to handle, analyze, plot\n",
    "def calcTotal(all): \n",
    "    bitKeys = list(filter(lambda x: \"BitString\" in x, all.columns.values))\n",
    "    all['Total_Covered_Goals'] = 0\n",
    "    all['Total_All_Goals'] = 0\n",
    "    newKeys = ['Total_Covered_Goals', 'Size', 'Length']\n",
    "    for bk in bitKeys:\n",
    "        newKey = bk.replace('BitString', 'Goals')\n",
    "        ak = newKey + 'All'\n",
    "        all[newKey] = all[bk].map(lambda c:str(c).count('1'))\n",
    "        all[ak] = all[bk].map(lambda c:len(str(c)))\n",
    "        all['Total_Covered_Goals'] += all[newKey]\n",
    "        all['Total_All_Goals'] += all[ak]\n",
    "        newKeys.append(newKey)\n",
    "    return all, newKeys\n",
    "def ana(s, key='Total_Covered_Goals'):\n",
    "    mean_covered_goals = s.mean()[key]\n",
    "    std_covered_goals = s.std()[key]\n",
    "    max_covered_goals = s.max()[key]\n",
    "    min_covered_goals = s.min()[key]\n",
    "    return mean_covered_goals,std_covered_goals,max_covered_goals,min_covered_goals\n",
    "def total(*sl):\n",
    "    total_max = 0\n",
    "    total_mean = 0\n",
    "    for s in sl:\n",
    "        tm,ts,t_max, t_min = ana(s)\n",
    "        total_max += t_max\n",
    "        total_mean += tm\n",
    "    return total_max, total_mean\n",
    "def compareSingle(n, m):\n",
    "    if n > m:\n",
    "        return 1\n",
    "    elif n == m:\n",
    "        return 0.5\n",
    "    return 0\n",
    "def get_single_a12_function(series):\n",
    "    return lambda n: series.map(lambda m:compareSingle(n, m))\n",
    "def cal_a12(a, b, key='Total_Covered_Goals'):\n",
    "    c = pd.DataFrame({'a':a[key][:], 'b':b[key][:]})\n",
    "    return  c['a'].map(get_single_a12_function(c['b'])).sum().sum()/(a[key].count()*b[key].count())\n",
    "\n",
    "from scipy import stats\n",
    "def greater_possibility(a, b):\n",
    "    ic = 0\n",
    "    for ia in a:\n",
    "        for ib in b:\n",
    "            if ia > ib:\n",
    "                ic = ic + 1\n",
    "    return ic / float(len(a) *len(b))\n",
    "def real_mu(a, b):\n",
    "    return greater_possibility(a, b) - greater_possibility(b, a)\n",
    "def calc_dij(a, b):\n",
    "    if a > b:\n",
    "        return 1\n",
    "    elif a < b:\n",
    "        return -1\n",
    "    return 0\n",
    "def calc_d(a, b):\n",
    "    ic = 0\n",
    "    for ia in a:\n",
    "        for ib in b:\n",
    "            ic += calc_dij(ia, ib)\n",
    "    return ic / float(len(a) *len(b))\n",
    "def calc_d_i(ia, b):\n",
    "    ic = 0\n",
    "    for ib in b:\n",
    "        ic += calc_dij(ia, ib)\n",
    "    return ic / float(len(b))\n",
    "def real_sd_square(a, b):\n",
    "    t = 0\n",
    "    d = calc_d(a, b)\n",
    "    \n",
    "    for ia in a:\n",
    "        t += len(b) ** 2 * ((calc_d_i(ia, b) - d) ** 2)\n",
    "    for ib in b:  \n",
    "        t += len(a) ** 2 * (((0 - calc_d_i(ib, a)) - d) ** 2)\n",
    "    for ia in a:\n",
    "        for ib in b:\n",
    "            t += ((calc_dij(ia, ib) - d) ** 2)\n",
    "    if t == 0:\n",
    "        t = 0.0000000000001 # avoid 0\n",
    "    return t / float(len(a) * len(b) * (len(a)-1) * (len(b)-1)  )\n",
    "def calc_t(a, b):\n",
    "    s_square = real_sd_square(a, b)\n",
    "    sd = math.sqrt(s_square)\n",
    "    if sd == 0:\n",
    "        return -1\n",
    "    return (calc_d(a,b)) / sd\n",
    "def calc_t_greater(a, b):\n",
    "    s_square = real_sd_square(a, b)\n",
    "    sd = math.sqrt(s_square)\n",
    "    return (calc_d(a,b)-1) / sd\n",
    "def get_a_b(a, b, key):\n",
    "    return a[key], b[key]\n",
    "def get_z(all_data, a1, a2, key):\n",
    "    a, b = get_a_b(all_data[a1], all_data[a2], key)\n",
    "    ic =  calc_t(a,b)\n",
    "    if ic == -1:\n",
    "        ic = 0\n",
    "        print(\"something wrong with\", a1, a2, key)\n",
    "    return ic\n",
    "def get_a12_stat(dft, all_data, remove_nom=False, zp = 1.8409): # p=0.05\n",
    "    ret = {}\n",
    "    dret = {}\n",
    "    nk = list(all_data.keys())\n",
    "    for index, v in dft.items():\n",
    "        dret[index] = {}\n",
    "        for vi, vv in v.iterrows():\n",
    "            if not ret.__contains__(vi):\n",
    "                ret[vi]  = 0\n",
    "            if not dret[index].__contains__(vi):\n",
    "                dret[index][vi]  = 0\n",
    "            for ag in nk:\n",
    "                if vv[ag] > 0.5:\n",
    "                    if remove_nom and get_z(all_data, vi, ag, index) < zp:\n",
    "                        continue\n",
    "                    ret[vi] = ret[vi] + 1\n",
    "                    dret[index][vi] = dret[index][vi] + 1\n",
    "    return ret, dret\n",
    "\n",
    "def calcResult(normal, deep, newKeys, nn='normal', dn='deep'): \n",
    "    nn = str(nn)\n",
    "    dn = str(dn)\n",
    "    result=pd.DataFrame(columns=('type', dn + '_avg', nn + '_avg', 'a12', 'increase', 'p_value<=0.05'))\n",
    "    i = 0\n",
    "    for newKey in newKeys:\n",
    "        deep_avg, deep_std, deep_max, deep_min = ana(deep, newKey)\n",
    "        normal_avg, normal_std, normal_max, normal_min = ana(normal, newKey)\n",
    "        a12 = cal_a12(deep, normal, newKey)\n",
    "        sa,sb = get_a_b(deep, normal, newKey)\n",
    "        result.loc[i] = [newKey, deep_avg, normal_avg, a12, (deep_avg-normal_avg)/normal_avg,calc_t(sa,sb) >1.8409]\n",
    "        i=i+1\n",
    "    return result\n",
    "def get_part(mne_all, number=30, delete_number = 30):\n",
    "    mne_deep = mne_all.head(number)\n",
    "    mne_all = mne_all.drop(list(range(delete_number))).reset_index(drop=True)\n",
    "    return mne_deep,mne_all\n",
    "def get_all(file):\n",
    "    mne_all = pd.read_csv(file)\n",
    "    mne_all, mne_newKeys = calcTotal(mne_all)\n",
    "    return  mne_all, mne_newKeys\n",
    "def get_data_matrix(group, newKeys):\n",
    "    nl = len(group)\n",
    "    gk = list(group.keys())\n",
    "    gk.sort()\n",
    "    agroup = list(map(lambda x: group[x], gk))\n",
    "\n",
    "    a = {}\n",
    "    for i in range(nl):\n",
    "        a[gk[i]] = {}\n",
    "        for j in range(0, nl):\n",
    "            a[gk[i]][gk[j]] = calcResult(agroup[j], agroup[i], newKeys, gk[i], gk[j])\n",
    "            a[gk[i]][gk[j]].index = a[gk[i]][gk[j]]['type']\n",
    "            del  a[gk[i]][gk[j]]['type']\n",
    "    dft = {}\n",
    "    for nk in newKeys:\n",
    "        df = pd.DataFrame([], index=gk)\n",
    "        for i in range(nl):\n",
    "            sg = {}\n",
    "            for j in range(nl):\n",
    "                sg[gk[j]] = (a[gk[i]][gk[j]]['a12'][nk])\n",
    "            df[gk[i]] = pd.Series(sg)\n",
    "        dft[nk] = df\n",
    "    for gk in newKeys:\n",
    "        dft[gk] = dft[gk].T\n",
    "        ml = []\n",
    "        sl = []\n",
    "        for key in list(dft[gk].index):\n",
    "            ml.append(group[key].mean()[gk])\n",
    "            sl.append(group[key].std()[gk])\n",
    "        dft[gk].insert(0, 'std', sl)\n",
    "        dft[gk].insert(0, 'mean', ml)\n",
    "        dft[gk] = dft[gk].sort_values(by=\"mean\" , ascending=False)\n",
    "    return a, dft\n",
    "def draw_time_plot(group, newKeys, sg = None, sl = 10):\n",
    "    ag = {}\n",
    "    for key in newKeys:\n",
    "        sub = {}\n",
    "        for w,data in group.items():\n",
    "            sub[w],_,_,_ = ana(data, key)\n",
    "        ag[key] = sub\n",
    "    fig = plt.figure(figsize=(20,9))\n",
    "    i = 1\n",
    "    for  nk in newKeys:\n",
    "        ax1=fig.add_subplot(math.ceil(len(newKeys)/2), 2, i)\n",
    "        keys = list(ag[nk].keys())\n",
    "        keys.sort()\n",
    "        values = list(map(lambda x: ag[nk][x], keys))\n",
    "        if sg is None:\n",
    "            ax1.plot(keys, values, label=nk)\n",
    "        else:\n",
    "            gn = len(sg)\n",
    "            b = 0\n",
    "            e = sl\n",
    "            for j in range(gn):\n",
    "                ax1.plot(list(map(lambda x: x -b, keys[b:e])), values[b:e], label= sg[j] + \"_\" + nk)\n",
    "                b = b + sl\n",
    "                e = e + sl\n",
    "        ax1.legend(loc='best')\n",
    "        i = i + 1\n",
    "\n",
    "def read_exp_data(exp_dir):\n",
    "    fs =  os.listdir(exp_dir)\n",
    "    ds = {}\n",
    "    for f in fs:\n",
    "        sf=os.path.join(exp_dir,f)\n",
    "        if not os.path.isdir(sf):\n",
    "            continue\n",
    "        if not f.startswith(\"task-\"):\n",
    "            continue\n",
    "        f_parts = f.split(\"-\")\n",
    "        ds[int(f_parts[1])] = sf\n",
    "    tasks = {}\n",
    "    for task_id, d in ds.items():\n",
    "        results_dir = os.path.join(d, \"results\")\n",
    "        rs = os.listdir(results_dir)\n",
    "        task_name = \"\"\n",
    "        for r in rs:\n",
    "            rf = os.path.join(results_dir, r)\n",
    "            if os.path.isdir(rf):\n",
    "                task_name = r\n",
    "                break\n",
    "        tasks[task_id] = {\"name\":task_name, \"classes\":[], \"data\":{}}\n",
    "        task_dir = os.path.join(d, \"results\", task_name)\n",
    "        projects = os.listdir(task_dir)\n",
    "        for project in projects:\n",
    "            pf = os.path.join(task_dir, project)\n",
    "            if not os.path.isdir(pf):\n",
    "                continue\n",
    "            cs = os.listdir(pf)\n",
    "            for single_class in cs:\n",
    "                cf = os.path.join(pf, single_class)\n",
    "                if not os.path.isdir(cf):\n",
    "                    continue\n",
    "                reports_dir = os.path.join(cf, \"reports\")\n",
    "                if not os.path.isdir(reports_dir):\n",
    "                    print(\"no report dir:\", reports_dir)\n",
    "                    continue\n",
    "                tasks[task_id]['classes'].append([project, single_class])\n",
    "                reports = os.listdir(reports_dir)\n",
    "                task_data = None\n",
    "                for single_report in reports:\n",
    "                    if not single_report.isnumeric():\n",
    "                        continue\n",
    "                    statistics_file = os.path.join(reports_dir, single_report, \"statistics.csv\")\n",
    "                    if not os.path.isfile(statistics_file):\n",
    "                         print(\"no statistics.csv:\", statistics_file)\n",
    "                         continue\n",
    "                    single_data = pd.read_csv(statistics_file)\n",
    "                    if task_data is None:\n",
    "                        task_data = single_data\n",
    "                    else:\n",
    "                        task_data = pd.concat([task_data, single_data],axis=0,ignore_index=True)\n",
    "                tasks[task_id]['data'][single_class] = task_data\n",
    "    return tasks\n",
    "\n",
    "def remain_part(data, remain_total):\n",
    "    for task_id, task in data.items():\n",
    "        for class_name, single_data in task['data'].items():\n",
    "            if single_data.count()['TARGET_CLASS'] > remain_total:\n",
    "                _, single_data = get_part(single_data, single_data.count()['TARGET_CLASS'] - remain_total,single_data.count()['TARGET_CLASS']- remain_total)\n",
    "                data[task_id]['data'][class_name] = single_data\n",
    "    return data\n",
    "\n",
    "def calc_class_info(all): \n",
    "    bitKeys = list(filter(lambda x: \"BitString\" in x, all.columns.values))\n",
    "    all['Total_All_Goals'] = 0\n",
    "    newKeys = ['Total_All_Goals']\n",
    "    for bk in bitKeys:\n",
    "        newKey = bk.replace('BitString', '')\n",
    "        all[newKey] = all[bk].map(lambda c:len(str(c)))\n",
    "        all['Total_All_Goals'] += all[newKey]\n",
    "        newKeys.append(newKey)\n",
    "    return all, newKeys\n",
    "\n",
    "def get_all_class_info(origin_hadoop_data):\n",
    "    nk = None\n",
    "    stat_map = None\n",
    "    for index in origin_hadoop_data.keys():\n",
    "        class_data_map = {}\n",
    "        for class_name, data in origin_hadoop_data[index]['data'].items():\n",
    "            sd, nk = calc_class_info(data)\n",
    "            if stat_map is None:\n",
    "                stat_map = sd\n",
    "            else:\n",
    "                stat_map = pd.concat([stat_map, sd],axis=0,ignore_index=True)\n",
    "    return stat_map, nk \n",
    "def get_dir_class_info(path):\n",
    "    origin_hadoop_data = read_exp_data(path)\n",
    "    origin_hadoop_data = remain_part(origin_hadoop_data, 1)\n",
    "    stat_map, nk = get_all_class_info(origin_hadoop_data)\n",
    "    return stat_map, nk\n",
    "def save_class_info(stat_map, nk, project, name):\n",
    "    stat_map['project'] = project\n",
    "    columns = ['project', 'TARGET_CLASS']\n",
    "    columns.extend(nk)\n",
    "    header = ['project', 'class']\n",
    "    header.extend(nk)\n",
    "    stat_map.to_csv('%s.csv'%name, index=False, columns=columns, header=header)\n",
    "def get_better_cases(show_df, key, a, b):\n",
    "    r =  show_df[(show_df['%s-%s'%(key, a)] > show_df['%s-%s' % (key, b)]) & (show_df['%s-p-%s-%s'%(key, a, b)] > 0.5) & (show_df['%s-s-%s-%s'%(key, a, b)] == 1.0)]\n",
    "    r = r[['class_name', \"%s-%s\" %(key, a), \"%s-%s\" % (key, b), '%s-p-%s-%s'%(key, a, b), '%s-s-%s-%s'%(key, a, b)]]\n",
    "    r[\"diff\"] = r['%s-%s'%(key, a)] - r['%s-%s' % (key, b)]\n",
    "    r[\"diff_rate\"] = r[\"diff\"] / (r['%s-%s' % (key, b)])\n",
    "    r[r[\"diff_rate\"]==float(\"inf\")]['diff_rate'] = np.nan\n",
    "    return r\n",
    "def get_better_multi(show_df, nk, a, b):\n",
    "    result = {}\n",
    "    df = None\n",
    "    base_columns = {\"key\":[],a:[], b:[]}\n",
    "    df = pd.DataFrame(base_columns)\n",
    "    for key in nk:\n",
    "        result[key] = {}\n",
    "        result[key][a] = get_better_cases(show_df, key, a, b)\n",
    "        result[key][b] = get_better_cases(show_df, key, b, a)\n",
    "        tmp = {\"key\":key, a:result[key][a].count()['class_name'], b:result[key][b].count()['class_name'], \"non-significant\":\n",
    "              show_df.count()['class_name']-result[key][a].count()['class_name']-result[key][b].count()['class_name']}    \n",
    "        df = df.append(tmp, ignore_index=True)\n",
    "    return result,df\n",
    "def get_compare_data(groups):\n",
    "    if(len(groups) != 2):\n",
    "        return False,False,False,False\n",
    "    stat_map = {}\n",
    "    nk = None\n",
    "    for class_name in data_group_by_class.keys():\n",
    "        class_data_map = {}\n",
    "        all_config_keys = list(data_group_by_class[class_name].keys())\n",
    "        class_sat = True\n",
    "        for ck in groups:\n",
    "            if not ck in all_config_keys:\n",
    "                class_sat = False\n",
    "                break\n",
    "        if not class_sat:\n",
    "            continue\n",
    "        for ok, single_data in data_group_by_class[class_name].items():\n",
    "            if ok in groups:\n",
    "                group = groups[ok]\n",
    "                class_data_map[group], nk = calcTotal(single_data['data'][class_name])\n",
    "        a, dft  = get_data_matrix(class_data_map, nk)\n",
    "        stat_map[class_name] = [a, dft, class_data_map]\n",
    "    base_columns = {\"class_name\":[]}\n",
    "    for  k in nk:\n",
    "        for _, v in groups.items():\n",
    "            base_columns['%s-%s'%(k,v)] = []\n",
    "    show_df = pd.DataFrame(base_columns)\n",
    "    for class_name, s in stat_map.items():\n",
    "        class_data_map = s[2] \n",
    "        tmp = {'class_name': class_name}\n",
    "        for  k in nk:\n",
    "            for _, v in groups.items():\n",
    "                tmp['%s-%s'%(k,v)] = s[1][k]['mean'][v]\n",
    "                for _, v1 in groups.items():\n",
    "                     if v == v1:\n",
    "                        continue\n",
    "                     if False:\n",
    "                        p = 0.5\n",
    "                        r = False\n",
    "                     else:\n",
    "                        p = s[1][k][v1][v]\n",
    "                        if p > 0.5:\n",
    "                         r = get_z(class_data_map, v, v1, k)\n",
    "                         r = r > 1.8409\n",
    "                        else:\n",
    "                         r = False\n",
    "                     tmp['%s-p-%s-%s'%(k,v, v1)] = p\n",
    "                     tmp['%s-s-%s-%s'%(k,v, v1)] = r\n",
    "        show_df = show_df.append(tmp, ignore_index=True)\n",
    "        group_list = list(groups.values())\n",
    "        ba, bd = get_better_multi(show_df, nk, group_list[0], group_list[1])\n",
    "    return nk, show_df, ba, bd\n",
    "def get_mean_data_of_diff_rate(d):\n",
    "    return d.describe().transpose()[[\"mean\"]]\n",
    "def get_stat_data_of_diff_rate(d):\n",
    "    return d.describe().transpose()[[\"mean\",\"50%\",\"max\"]]\n",
    "def get_diff_rate_data(all_data, keys, config_name):\n",
    "    data = None\n",
    "    ck = []\n",
    "    for key in keys:\n",
    "        tmp = all_data[key][config_name][[\"class_name\", \"diff_rate\"]]\n",
    "        new_key = key\n",
    "        dk = \"%s_diff_rate\" % new_key\n",
    "        ck.append(dk)\n",
    "        tmp = tmp.rename(columns={\"diff_rate\":dk})\n",
    "        if data is None:\n",
    "            data = tmp\n",
    "        else:\n",
    "           data = pd.merge(data, tmp, on='class_name', how='outer')\n",
    "    return data[ck]\n",
    "\n",
    "SPINE_COLOR = 'gray'\n",
    "\n",
    "def latexify(fig_width=None, fig_height=None, columns=1):\n",
    "    \"\"\"Set up matplotlib's RC params for LaTeX plotting.\n",
    "    Call this before plotting a figure.\n",
    "    Parameters\n",
    "    ----------\n",
    "    fig_width : float, optional, inches\n",
    "    fig_height : float,  optional, inches\n",
    "    columns : {1, 2}\n",
    "    \"\"\"\n",
    "\n",
    "    # code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples\n",
    "\n",
    "    # Width and max height in inches for IEEE journals taken from\n",
    "    # computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf\n",
    "\n",
    "    assert(columns in [1,2])\n",
    "\n",
    "    if fig_width is None:\n",
    "        fig_width = 3.39 if columns==1 else 6.9 # width in inches\n",
    "\n",
    "    if fig_height is None:\n",
    "        golden_mean = (sqrt(5)-1.0)/2.0    # Aesthetic ratio\n",
    "        fig_height = fig_width*golden_mean # height in inches\n",
    "\n",
    "    MAX_HEIGHT_INCHES = 8.0\n",
    "    if fig_height > MAX_HEIGHT_INCHES:\n",
    "        print(\"WARNING: fig_height too large:\" + fig_height + \n",
    "              \"so will reduce to\" + MAX_HEIGHT_INCHES + \"inches.\")\n",
    "        fig_height = MAX_HEIGHT_INCHES\n",
    "\n",
    "    # NB (bart): default font-size in latex is 11. This should exactly match \n",
    "    # the font size in the text if the figwidth is set appropriately.\n",
    "    # Note that this does not hold if you put two figures next to each other using\n",
    "    # minipage. You need to use subplots.\n",
    "    params = {'backend': 'ps',\n",
    "              'text.latex.preamble': ['\\\\usepackage{gensymb}'],\n",
    "              'axes.labelsize': 8, # fontsize for x and y labels (was 12 and before 10)\n",
    "              'axes.titlesize': 8,\n",
    "              'font.size': 8, # was 12 and before 10\n",
    "              'legend.fontsize': 8, # was 12 and before 10\n",
    "              'xtick.labelsize': 8,\n",
    "              'ytick.labelsize': 8,\n",
    "              'text.usetex': True,\n",
    "              'figure.figsize': [fig_width,fig_height],\n",
    "              'font.family': 'serif'\n",
    "    }\n",
    "\n",
    "    matplotlib.rcParams.update(params)\n",
    "\n",
    "\n",
    "def format_axes(ax):\n",
    "\n",
    "    for spine in ['top', 'right']:\n",
    "        ax.spines[spine].set_visible(False)\n",
    "\n",
    "    for spine in ['left', 'bottom']:\n",
    "        ax.spines[spine].set_color(SPINE_COLOR)\n",
    "        ax.spines[spine].set_linewidth(0.5)\n",
    "\n",
    "    ax.xaxis.set_ticks_position('bottom')\n",
    "    ax.yaxis.set_ticks_position('left')\n",
    "\n",
    "    for axis in [ax.xaxis, ax.yaxis]:\n",
    "        axis.set_tick_params(direction='out', color=SPINE_COLOR)\n",
    "\n",
    "    return ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "current_path = os.path.relpath(\".\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_dir = os.path.join(current_path,  \"exp-33-set-cover-2\")\n",
    "data = read_exp_data(exp_dir)\n",
    "remain_total = 30\n",
    "data = remain_part(data, remain_total)\n",
    "classes = ['org_apache_hadoop_yarn_nodelabels_CommonNodeLabelsManager', 'org_apache_hadoop_io_MapFile', 'org_apache_hadoop_ipc_Client', 'org_apache_hadoop_mapreduce_jobhistory_JobHistoryEventHandler', 'org_apache_hadoop_yarn_server_metrics_AMRMClientRelayerMetrics', 'org_apache_hadoop_thirdparty_protobuf_Value', 'org_apache_hadoop_security_token_delegation_AbstractDelegationTokenSecretManager', 'org_apache_hadoop_fs_shell_Ls', 'org_apache_hadoop_mapred_QueueConfigurationParser', 'org_apache_hadoop_yarn_api_records_impl_pb_NodeReportPBImpl', 'org_apache_hadoop_yarn_proto_YarnSecurityTokenProtos', 'org_apache_hadoop_net_NetworkTopology', 'org_apache_hadoop_mapreduce_v2_proto_MRServiceProtos', 'org_apache_hadoop_yarn_proto_YarnServerNodemanagerServiceProtos', 'org_apache_hadoop_thirdparty_com_google_common_reflect_TypeToken', 'org_apache_hadoop_mapreduce_jobhistory_HistoryViewer', 'org_apache_hadoop_mapreduce_lib_input_CombineFileInputFormat', 'org_apache_hadoop_thirdparty_org_checkerframework_checker_i18nformatter_I18nFormatUtil', 'org_apache_hadoop_thirdparty_protobuf_AbstractMessage', 'org_apache_hadoop_metrics2_sink_RollingFileSystemSink']\n",
    "data_group_by_class = {}\n",
    "for task_id, single in data.items():\n",
    "    if single['classes'][0][1] not in classes:\n",
    "        continue\n",
    "    if not single['classes'][0][1] in data_group_by_class:\n",
    "        data_group_by_class[single['classes'][0][1]] = {}\n",
    "    data_group_by_class[single['classes'][0][1]][single['name']] = single"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/zhouzhichao/ml/my_env/lib/python3.7/site-packages/ipykernel_launcher.py:139: RuntimeWarning: invalid value encountered in double_scalars\n"
     ]
    }
   ],
   "source": [
    "dnk,dshow_df,da,dd =  get_compare_data({'origin-dynamosa-1.2.0':\"origin\", 'sc-dynamosa-set-cover':\"sc\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "da_sc_diff_rate = get_diff_rate_data(da, dnk, \"sc\")\n",
    "da_origin_diff_rate = get_diff_rate_data(da, dnk, \"origin\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "dd = dd[0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='key'>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAKpCAYAAACPajleAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAApx0lEQVR4nO3de7SddX3v+8/XBAUBASXFC8Rgt6JIIMQQqQLiDRUVKhtPpVqRraa68dZ2u/Wc4wCLw721spXjpbIRQbwhWqTSihe0pYLDIiFAkIsbxVSCVhARCJehwd/5IzPpIqxFwlrrl7nm8vUaY43M5zLn/EYYyzfP88xnVmstAABMr4cMewAAgNlIZAEAdCCyAAA6EFkAAB2ILACADkQWAEAHc4c9wHh23nnntmDBgmGPAQCwSZdeeukvW2vzNl4/IyNrwYIFWb58+bDHAADYpKr6t/HWO10IANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADrYZGRV1W5V9c9VdXVVXVVVbx2sf2RVnV9V1w3+3GmC5x892Oe6qjp6uv8CAAAz0eYcyVqb5K9aa3sm2T/JsVW1Z5J3Jvl2a+2JSb49WL6PqnpkkuOTPD3J0iTHTxRjAACzySYjq7X289baisHjO5Jck+RxSQ5PcsZgtzOS/PE4T39BkvNba79qrd2a5PwkL5yGuQEAZrQHdU1WVS1Ism+Si5Ps0lr7+WDTvyfZZZynPC7JDWOWVw/WAQDManM3d8eq2i7J2Une1lq7vao2bGuttapqUxmkqpYlWZYk8+fPn8pLzUoLz1g47BEYIVcefeWwR2BE+N3Cg+F3y4OzWUeyqmqrrAusz7XWvjxY/Yuqesxg+2OS3DTOU29MstuY5V0H6+6ntXZKa21Ja23JvHnzNnd+AIAZaXM+XVhJPpnkmtbaB8dsOjfJ+k8LHp3kK+M8/RtJDqmqnQYXvB8yWAcAMKttzpGsZyb5syTPqarLBz+HJnlfkudX1XVJnjdYTlUtqapTk6S19qsk70lyyeDnhME6AIBZbZPXZLXWLkpSE2x+7jj7L0/yujHLpyU5bbIDAgCMInd8BwDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0MHcTe1QVacleUmSm1prew3WnZVkj8EuOyb5dWtt0TjPXZXkjiT3JlnbWlsyLVMDAMxwm4ysJJ9K8tEkn16/orX2J+sfV9X/SnLbAzz/2a21X052QACAUbTJyGqtfaeqFoy3raoqyf+V5DnTPBcAwEib6jVZByb5RWvtugm2tyTfrKpLq2rZA71QVS2rquVVtfzmm2+e4lgAAMM11cg6KsmZD7D9gNba4iQvSnJsVR000Y6ttVNaa0taa0vmzZs3xbEAAIZr0pFVVXOTHJHkrIn2aa3dOPjzpiTnJFk62fcDABglUzmS9bwk17bWVo+3saq2rart1z9OckiSH0zh/QAARsYmI6uqzkzyvSR7VNXqqnrtYNMrstGpwqp6bFWdN1jcJclFVXVFku8n+Wpr7evTNzoAwMy1OZ8uPGqC9a8ZZ93Pkhw6eHx9kn2mOB8AwEhyx3cAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdbDKyquq0qrqpqn4wZt27q+rGqrp88HPoBM99YVX9sKp+VFXvnM7BAQBmss05kvWpJC8cZ/2HWmuLBj/nbbyxquYk+ViSFyXZM8lRVbXnVIYFABgVm4ys1tp3kvxqEq+9NMmPWmvXt9Z+k+QLSQ6fxOsAAIycuVN47puq6tVJlif5q9barRttf1ySG8Ysr07y9IlerKqWJVmWJPPnz5/CWLPTlT/56bBHAAAehMle+P7xJH+YZFGSnyf5X1MdpLV2SmttSWttybx586b6cgAAQzWpyGqt/aK1dm9r7XdJPpF1pwY3dmOS3cYs7zpYBwAw600qsqrqMWMWX5bkB+PsdkmSJ1bV7lX10CSvSHLuZN4PAGDUbPKarKo6M8nBSXauqtVJjk9ycFUtStKSrEry54N9H5vk1Nbaoa21tVX1piTfSDInyWmttat6/CUAAGaaTUZWa+2ocVZ/coJ9f5bk0DHL5yW53+0dAABmO3d8BwDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKCDTUZWVZ1WVTdV1Q/GrPtAVV1bVSur6pyq2nGC566qqiur6vKqWj6NcwMAzGibcyTrU0leuNG685Ps1VrbO8n/SfJ/P8Dzn91aW9RaWzK5EQEARs8mI6u19p0kv9po3Tdba2sHi/+aZNcOswEAjKzpuCbrvyT52gTbWpJvVtWlVbVsGt4LAGAkzJ3Kk6vq/02yNsnnJtjlgNbajVX1B0nOr6prB0fGxnutZUmWJcn8+fOnMhYAwNBN+khWVb0myUuSvLK11sbbp7V24+DPm5Kck2TpRK/XWjultbaktbZk3rx5kx0LAGBGmFRkVdULk/z3JIe11u6aYJ9tq2r79Y+THJLkB+PtCwAw22zOLRzOTPK9JHtU1eqqem2SjybZPutOAV5eVScP9n1sVZ03eOouSS6qqiuSfD/JV1trX+/ytwAAmGE2eU1Wa+2ocVZ/coJ9f5bk0MHj65PsM6XpAABGlDu+AwB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6EBkAQB0ILIAADoQWQAAHYgsAIAORBYAQAciCwCgA5EFANCByAIA6GCzIquqTquqm6rqB2PWPbKqzq+q6wZ/7jTBc48e7HNdVR09XYMDAMxkm3sk61NJXrjRuncm+XZr7YlJvj1Yvo+qemSS45M8PcnSJMdPFGMAALPJZkVWa+07SX610erDk5wxeHxGkj8e56kvSHJ+a+1XrbVbk5yf+8caAMCsM5VrsnZprf188Pjfk+wyzj6PS3LDmOXVg3UAALPa3Ol4kdZaq6o2ldeoqmVJliXJ/Pnzp2OsWWXBPZ8f9giMkFXDHoCRceVPfjrsEWDWmsqRrF9U1WOSZPDnTePsc2OS3cYs7zpYdz+ttVNaa0taa0vmzZs3hbEAAIZvKpF1bpL1nxY8OslXxtnnG0kOqaqdBhe8HzJYBwAwq23uLRzOTPK9JHtU1eqqem2S9yV5flVdl+R5g+VU1ZKqOjVJWmu/SvKeJJcMfk4YrAMAmNU265qs1tpRE2x67jj7Lk/yujHLpyU5bVLTAQCMKHd8BwDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0MGkI6uq9qiqy8f83F5Vb9ton4Or6rYx+xw35YkBAEbA3Mk+sbX2wySLkqSq5iS5Mck54+x6YWvtJZN9HwCAUTRdpwufm+THrbV/m6bXAwAYadMVWa9IcuYE2/6oqq6oqq9V1VMneoGqWlZVy6tq+c033zxNYwEADMeUI6uqHprksCRfGmfziiSPb63tk+QjSf5+otdprZ3SWlvSWlsyb968qY4FADBU03Ek60VJVrTWfrHxhtba7a21NYPH5yXZqqp2nob3BACY0aYjso7KBKcKq+rRVVWDx0sH73fLNLwnAMCMNulPFyZJVW2b5PlJ/nzMujckSWvt5CRHJnljVa1NcneSV7TW2lTeEwBgFEwpslprdyZ51EbrTh7z+KNJPjqV9wAAGEXu+A4A0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0MHcYQ8AwPAsuOfzwx6BEbJq2AOMGEeyAAA6EFkAAB2ILACADkQWAEAHIgsAoAORBQDQgcgCAOhAZAEAdCCyAAA6EFkAAB2ILACADkQWAEAHIgsAoAORBQDQgcgCAOhAZAEAdCCyAAA6EFkAAB2ILACADkQWAEAHIgsAoIMpR1ZVraqqK6vq8qpaPs72qqoPV9WPqmplVS2e6nsCAMx0c6fpdZ7dWvvlBNtelOSJg5+nJ/n44E8AgFlrS5wuPDzJp9s6/5pkx6p6zBZ4XwCAoZmOyGpJvllVl1bVsnG2Py7JDWOWVw/WAQDMWtNxuvCA1tqNVfUHSc6vqmtba995sC8yCLRlSTJ//vxpGAsAYHimfCSrtXbj4M+bkpyTZOlGu9yYZLcxy7sO1m38Oqe01pa01pbMmzdvqmMBAAzVlCKrqratqu3XP05ySJIfbLTbuUlePfiU4f5Jbmut/Xwq7wsAMNNN9XThLknOqar1r/X51trXq+oNSdJaOznJeUkOTfKjJHclOWaK7wkAMONNKbJaa9cn2Wec9SePedySHDuV9wEAGDXu+A4A0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdiCwAgA5EFgBAByILAKCDSUdWVe1WVf9cVVdX1VVV9dZx9jm4qm6rqssHP8dNbVwAgNEwdwrPXZvkr1prK6pq+ySXVtX5rbWrN9rvwtbaS6bwPgAAI2fSR7Jaaz9vra0YPL4jyTVJHjddgwEAjLJpuSarqhYk2TfJxeNs/qOquqKqvlZVT52O9wMAmOmmcrowSVJV2yU5O8nbWmu3b7R5RZLHt9bWVNWhSf4+yRMneJ1lSZYlyfz586c6FgDAUE3pSFZVbZV1gfW51tqXN97eWru9tbZm8Pi8JFtV1c7jvVZr7ZTW2pLW2pJ58+ZNZSwAgKGbyqcLK8knk1zTWvvgBPs8erBfqmrp4P1umex7AgCMiqmcLnxmkj9LcmVVXT5Y9/8kmZ8krbWTkxyZ5I1VtTbJ3Ule0VprU3hPAICRMOnIaq1dlKQ2sc9Hk3x0su8BADCq3PEdAKADkQUA0IHIAgDoQGQBAHQgsgAAOhBZAAAdTPlrdYDZ47e//W1Wr16de+65Z9ij8CBsvfXW2XXXXbPVVlsNexRgDJEFbLB69epsv/32WbBgQQZf1sAM11rLLbfcktWrV2f33Xcf9jjAGE4XAhvcc889edSjHiWwRkhV5VGPepSjjzADiSzgPgTW6PHPDGYmkQUA0IFrsoAJLXjnV6f19Va978XT9lqHHnpoPv/5z2fHHXeccJ/jjjsuBx10UJ73vOdN2/sCbC6RBYyU1lpaaznvvPM2ue8JJ5ywBSYCGJ/ThcCM88EPfjB77bVX9tprr5x00klZtWpV9thjj7z61a/OXnvtlRtuuCELFizIL3/5yyTJe97znuyxxx454IADctRRR+XEE09MkrzmNa/J3/3d3yVJFixYkOOPPz6LFy/OwoULc+211w7t7wf8fnAkC5hRLr300px++um5+OKL01rL05/+9DzrWc/KddddlzPOOCP777//ffa/5JJLcvbZZ+eKK67Ib3/72yxevDhPe9rTxn3tnXfeOStWrMjf/u3f5sQTT8ypp566Jf5KwO8pR7KAGeWiiy7Ky172smy77bbZbrvtcsQRR+TCCy/M4x//+PsFVpJ897vfzeGHH56tt94622+/fV760pdO+NpHHHFEkuRpT3taVq1a1euvAJBEZAEjYtttt53yazzsYQ9LksyZMydr166d8usBPBCRBcwoBx54YP7+7/8+d911V+68886cc845OfDAAyfc/5nPfGb+4R/+Iffcc0/WrFmTf/zHf9yC0wJMzDVZwISm85YLm2vx4sV5zWtek6VLlyZJXve612WnnXaacP/99tsvhx12WPbee+/ssssuWbhwYXbYYYctNS7AhKq1NuwZ7mfJkiVt+fLlwx5jRpnu+xUxu002jq655po85SlPmeZp+luzZk2222673HXXXTnooINyyimnZPHixcMea4ua7D87v1t4MIbxH16joKouba0t2Xi9I1nAyFu2bFmuvvrq3HPPPTn66KN/7wILmJlEFjDyPv/5zw97BID7ceE7AEAHIgsAoAORBQDQgcgCAOjAhe/AxN49zfebevdt0/t6ADOYI1kAAB2ILGBGufPOO/PiF784++yzT/baa6+cddZZueSSS/KMZzwj++yzT5YuXZo77rhj2GMCbJLThcCM8vWvfz2Pfexj89WvrrsT+W233ZZ99903Z511Vvbbb7/cfvvt2WabbYY8JcCmOZIFzCgLFy7M+eefn3e84x258MIL89Of/jSPecxjst9++yVJHvGIR2TuXP99CMx8IguYUZ70pCdlxYoVWbhwYd71rnfly1/+8rBHApgUkQXMKD/72c/y8Ic/PK961avy9re/PRdffHF+/vOf55JLLkmS3HHHHVm7du2QpwTYNMfcgYkN4ZYLV155Zd7+9rfnIQ95SLbaaqt8/OMfT2stb37zm3P33Xdnm222ybe+9a1st912W3w2gAdDZAEzygte8IK84AUvuN/6f/3Xfx3CNACT53QhAEAHIgsAoAORBQDQgcgCAOhAZAEAdCCyAAA6cAsHYEILz1g4ra935dFXTuvrTdW5556bq6++Ou985zsn9fzXve51+cu//Mvsueee+dKXvpTjjjsuj370o/OBD3wgn/70p/PhD3942ma94IIL8tCHPjTPeMYzpu01gb5EFvB767DDDsthhx026eefeuqpGx5/8pOfzCc+8YkccMABSZIlS5ZMeb6xLrjggmy33XYiC0aI04XAjLJq1ao85SlPyetf//o89alPzSGHHJK77747l19+efbff//svffeednLXpZbb701SXLwwQfnHe94R5YuXZonPelJufDCC8d93Q9/+MPZc889s/fee+cVr3hFkuRTn/pU3vSmNyVJfvzjH2f//fff8J2J6+8of8EFF+Tggw/OkUcemSc/+cl55Stfmdbahvdevnx5TjjhhFx00UV57Wtfm7e//e254IIL8pKXvCRJsmbNmhxzzDFZuHBh9t5775x99tlJkje+8Y1ZsmRJnvrUp+b444/fMOeCBQty/PHHZ/HixVm4cGGuvfbarFq1KieffHI+9KEPZdGiRRP+HYGZRWQBM851112XY489NldddVV23HHHnH322Xn1q1+d97///Vm5cmUWLlyYv/7rv96w/9q1a/P9738/J5100n3Wj/W+970vl112WVauXJmTTz75ftvf+ta35q1vfWuuvPLK7LrrrvfZdtlll+Wkk07K1Vdfneuvvz7f/e5377P9uOOOy5IlS/K5z30uH/jAB+6z7T3veU922GGHXHnllVm5cmWe85znJEne+973Zvny5Vm5cmX+5V/+JStXrtzwnJ133jkrVqzIG9/4xpx44olZsGBB3vCGN+Qv/uIvcvnll+fAAw98cP+DAkMhsoAZZ/fdd8+iRYuSJE972tPy4x//OL/+9a/zrGc9K0ly9NFH5zvf+c6G/Y844ogN+65atWrc19x7773zyle+Mp/97Gczd+79r5T43ve+l5e//OVJkj/90z+9z7alS5dm1113zUMe8pAsWrRowvcYz7e+9a0ce+yxG5Z32mmnJMkXv/jFLF68OPvuu2+uuuqqXH311Q/q7wPMfCILmHEe9rCHbXg8Z86c/PrXv96s/efMmZO1a9cmSY455pgsWrQohx56aJLkq1/9ao499tisWLEi++2334b9JjPPg3nueH7yk5/kxBNPzLe//e2sXLkyL37xi3PPPfc84N8HGD0iC5jxdthhh+y0004brkX6zGc+s+Go1kROP/30XH755TnvvPPyu9/9LjfccEOe/exn5/3vf39uu+22rFmz5j7777///huul/rCF74wbbM///nPz8c+9rENy7feemtuv/32bLvtttlhhx3yi1/8Il/72tc2+Trbb7997rjjjmmbC+jPpwuBCc2kWy6cccYZecMb3pC77rorT3jCE3L66adv9nPvvffevOpVr8ptt92W1lre8pa3ZMcdd7zPPieddFJe9apX5b3vfW9e+MIXZocddpiWud/1rnfl2GOPzV577ZU5c+bk+OOPzxFHHJF99903T37yk7Pbbrvlmc985iZf56UvfWmOPPLIfOUrX8lHPvIR12XBCKj1n5KZSZYsWdKWL18+7DFmlAXv/OqwR2CErHrfiyf1vGuuuSZPecpTpnma0XDXXXdlm222SVXlC1/4Qs4888x85StfGfZYm22y/+z8buHBmOzvltmuqi5trd3vvi2OZAEkufTSS/OmN70prbXsuOOOOe2004Y9EjDiRBZAkgMPPDBXXHHFsMcAZhEXvgP3MRMvIeCB+WcGM5PIAjbYeuutc8stt/g/7RHSWsstt9ySrbfeetijABtxuhDYYNddd83q1atz8803D3sUHoStt976fnepB4ZPZAEbbLXVVtl9992HPQbArDCl04VV9cKq+mFV/aiq3jnO9odV1VmD7RdX1YKpvB8AwKiYdGRV1ZwkH0vyoiR7JjmqqvbcaLfXJrm1tfafknwoyfsn+34AAKNkKkeylib5UWvt+tbab5J8IcnhG+1zeJIzBo//Lslzq6qm8J4AACNhKtdkPS7JDWOWVyd5+kT7tNbWVtVtSR6V5Jcbv1hVLUuybLC4pqp+OIXZ+P2xc8b59+n3XTlmDFPld8s4/G6Z0OPHWzljLnxvrZ2S5JRhz8Foqarl432VAcBU+N3CdJjK6cIbk+w2ZnnXwbpx96mquUl2SHLLFN4TAGAkTCWyLknyxKravaoemuQVSc7daJ9zkxw9eHxkkn9q7nIIAPwemPTpwsE1Vm9K8o0kc5Kc1lq7qqpOSLK8tXZukk8m+UxV/SjJr7IuxGA6OcUM9OB3C1NWDiwBAEw/310IANCByAIA6EBkAQB0ILIYKVX11qp6RK3zyapaUVWHDHsuANiYyGLU/JfW2u1JDkmyU5I/S/K+4Y4EzAZVtcvgP96+Nljes6peO+y5GF0ii1Gz/rsvD03ymdbaVWPWAUzFp7LutkSPHSz/nyRvG9YwjD6Rxai5tKq+mXWR9Y2q2j7J74Y8EzA77Nxa+2IGv1Naa2uT3DvckRhlM+a7C2EzvTbJoiTXt9buqqpHJTlmuCMBs8Sdg98pLUmqav8ktw13JEaZyGIkVNXijVY9ocpZQmBa/VXWfR3cH1bVd5PMS/Ly4Y7EKHPHd0ZCVf3zA2xurbXnbLFhgFmrquYm2SPrrvX8YWvtt0MeiREmsgAgSVX9OMkHWmsnj1n3j621lwxxLEaY04WMnKraK8meSbZev6619unhTQTMEr9N8uyqenqSP2+t/SbJ44Y8EyPMpwsZKVV1fJKPDH6eneRvkhw21KGA2eKu1tqfJLkmyYVVNT+Di+BhMhzJYtQcmWSfJJe11o6pql2SfHbIMwGzQyVJa+1vqmpFkm8meeRwR2KUiSxGzd2ttd9V1dqqekSSm5LsNuyhgFnhuPUPWmvfqqoXJDl6iPMw4kQWo2Z5Ve2Y5BNJLk2yJsn3hjoRMNKq6smttWuT3DjO7WL+cRgzMTv4dCEjq6oWJHlEa23lsGcBRldVndJaWzbmVjH3+T9Gt4hhskQWI6eqDkty0GDxX1pr/zDMeYDRVlVLk/y0tfbvg+Wjk/znJKuSvLu19qshjscI8+lCRkpVvS/JW5NcPfh5S1X9j+FOBYy4k5P8Jkmq6qAk/zPJGVn3lTqnDHEuRpwjWYyUqlqZZFFr7XeD5TlZ90nDvYc7GTCqquqK1to+g8cfS3Jza+3dg+XLW2uLhjgeI8yRLEbRjmMe7zCsIYBZY87g63SS5LlJ/mnMNh8QY9L8y8Oo+Z9JLhtcoFpZd23WO4c7EjDizkzyL1X1yyR3J7kwSarqP2XdKUOYFKcLGTlV9Zgk+w0Wv7/+YlWAyaqq/ZM8Jsk3W2t3DtY9Kcl2rbUVQx2OkSWyGAmDa6+2aa2tGSzvn+Shg82XtdbuGNpwADAOkcVIqKoTk9zUWvubwfL1SX6QZJskK1pr7xjmfACwMddkMSqem/84RZgkt7XWDquqyuD6CQCYSXy6kFHxkNba2jHL70iStu5Q7HbDGQkAJiayGBUPrart1y+01r6ZJFW1Q5KthzYVAExAZDEqPpHkrKqav35FVT0+6z56ferQpgKACbgmi5HQWvtgVd2V5KKq2nawek2S97XWPj7E0QBgXD5dyMhZf9pwvNs2VNXRrbUztvxUAHBfIotZpapWtNYWD3sOAHBNFrNNDXsAAEhEFrOPQ7MAzAgii9nGkSwAZgSRxWzz3WEPAACJC98ZEVX1lw+0vbX2wS01CwBsDvfJYlSsv9v7Hln3HYbnDpZfmuT7Q5kIAB6AI1mMlKr6TpIXr79H1uCeWV9trR003MkA4L5ck8Wo2SXJb8Ys/2awDgBmFKcLGTWfTvL9qjpnsPzHSdzhHYAZx+lCRk5VLU5y4GDxO621y4Y5DwCMx+lCRtHDk9zeWvv/kqyuqt2HPRAAbMyRLEZKVR2fZEmSPVprT6qqxyb5UmvtmUMeDQDuw5EsRs3LkhyW5M4kaa39LP9xewcAmDFEFqPmN23d4deWJFW17ZDnAYBxiSxGzRer6n8n2bGqXp/kW0k+MeSZAOB+XJPFyKiqSrJrkicnOSTrvgz6G62184c6GACMQ2QxUqrqytbawmHPAQCb4nQho2ZFVe037CEAYFMcyWKkVNW1SZ6YZFXWfcKwkrTW2t7DnAsANiayGClV9fjx1rfW/m1LzwIAD8TpQkbKIKZ2S/KcweO74t9jAGYgR7IYKe74DsCocASAUeOO7wCMBJHFqHHHdwBGgshi1LjjOwAjwTVZjJyqen7c8R2AGU5kMVKq6i+TnNVau3HYswDAA3G6kFGzfZJvVtWFVfWmqtpl2AMBwHgcyWIkVdXeSf4kyX9Osrq19rwhjwQA9+FIFqPqpiT/nuSWJH8w5FkA4H5EFiOlqv5rVV2Q5NtJHpXk9b63EICZaO6wB4AHabckb2utXT7sQQDggbgmi5FTVfskOXCweGFr7YphzgMA43G6kJFSVW9J8rmsuw7rD5J8tqrePNypAOD+HMlipFTVyiR/1Fq7c7C8bZLvuS4LgJnGkSxGTSW5d8zyvYN1ADCjuPCdUXN6kour6pzB8h8n+eTwxgGA8TldyMipqsVJDhgsXthau2yY8wDAeEQWI6Gq9kuyc2vtaxutPzTJL1prlw5nMgAYn2uyGBXvT3L1OOuvSvKBLTwLAGySyGJUbN9a+7eNVw7W7TyEeQDgAYksRsVOD7Dt4VtsCgDYTCKLUfGtqnpvVW24XUOtc0KSfxriXAAwLhe+MxIGNx09NcnSJJcPVu+TZHmS17XW1gxpNAAYl8hipFTVE5I8dbB4VWvt+o22P7W1dtWWnwwA7ktkMatU1YrW2uJhzwEArslitvEVOwDMCCKL2cahWQBmBJEFANCByGK2+c2wBwCAxIXvjIjBl0JPqLW2YkvNAgCbQ2QxEqrqnx9gc2utPWeLDQMAm0FkAQB0MHfYA8CDVVV7Jdkzydbr17XWPj28iQDg/hzJYqRU1fFJDs66yDovyYuSXNRaO3KYcwHAxny6kFFzZJLnJvn31toxWff9hTsMdyQAuD+Rxai5u7X2uyRrq+oRSW5KstuQZwKA+3FNFqNmeVXtmOQTSS5NsibJ94Y6EQCMwzVZjKyqWpDkEa21lcOeBQA25nQhI6Wqvr3+cWttVWtt5dh1ADBTOF3ISKiqrZM8PMnOVbVTkhpsekSSxw1tMACYgMhiVPx5krcleWySsV+hc3uSjw5jIAB4IK7JYqRU1Ztbax8Z9hwAsCkii5FSVQ9N8oYkBw1WXZDkf7fWfju0oQBgHCKLkVJVpybZKskZg1V/luTe1trrhjcVANyfyGIkVNXc1traqrqitbbPRtvutw4Ahs0tHBgV3x/8eW9V/eH6lVX1hCT3DmckAJiYTxcyKtbfsuG/Jfnnqrp+sLwgyTFDmQgAHoDThYyEqlqd5IODxW2SzBk8vjfrvs/wg+M+EQCGxJEsRsWcJNvlP45orTc3yfZbfhwAeGCOZDESqmpFa23xsOcAgM3lwndGxcZHsABgRnMki5FQVY9srf1q2HMAwOYSWQAAHThdCADQgcgCAOhAZAGzRlUtqKofDHsOgERkAQB0IbKAWamqnlBVl1XV06vq61V1aVVdWFVPrqrtq+onVbXVYN9HjF0GmA4iC5h1qmqPJGcneU2S/5Hkza21p2Xdd1/+bWvtjiQXJHnx4CmvSPLl1tpvt/y0wGzlFg7ArFFVC5JcnOTWJEck+WmSm5P8cMxuD2utPaWqnpnkv7fWDq+q7yV5fWvN9VzAtPHdhcBsc1vWxdUBSb6Q5NettUUb79Ra++7gQvmDk8wRWMB0c7oQmG1+k+RlSV6d5CVJflJVL0+SWmefMft+Osnnk5y+xacEZj2RBcw6rbU7sy6w/iLJWUleW1VXJLkqyeFjdv1ckp2SnLnFhwRmPddkAb+3qurIJIe31v5s2LMAs49rsoDfS1X1kSQvSnLosGcBZidHsgAAOnBNFgBAByILAKADkQUA0IHIAgDoQGQBAHQgsgAAOvj/AePIhRl66p49AAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 720x720 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dd.plot.bar(x='key', figsize=(10, 10),stacked=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
